This will be illustrated using the yields of US Treasury 10 year bonds. Yields should fall as demand for these bonds increases, increasing demand for the US dollars and pushing its value up.
# Import libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import scipy as sp
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf
from statsmodels.tsa.stattools import grangercausalitytests
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
Data source: https://www.kaggle.com/brunotly/foreign-exchange-rates-per-dollar-20002019
Downloaded: 16th March 2020
# Read in data
fx = pd.read_csv("FX_rates.csv", usecols = list(range(1,24)))
# Replace missing data with NaN
fx = fx.replace('ND', np.nan)
# Convert dates into date format
fx['Time Serie'] = pd.to_datetime(fx['Time Serie'], format = '%Y-%m-%d')
# Create clean dataset with no missing data
fx = fx.dropna()
# Convert exchange rate columns from strings to floats
fx.iloc[:,1:23] = fx.iloc[:,1:23].apply(pd.to_numeric)
## List of curreny codes
curr_code = fx.columns[1:]
# Extract list of countries from exchange rate names
country = []
for code in fx.columns[1:]:
a = code.split(" -")
country.append(a[0])
# List of colours for countries
color = ['goldenrod', 'royalblue', 'black', 'mediumblue', 'gold',
'olive', 'red', 'mediumvioletred', 'deepskyblue', 'purple',
'green', 'seagreen', 'lightgreen', 'slategrey', 'rosybrown',
'royalblue', 'darkkhaki', 'cornflowerblue', 'maroon', 'mediumpurple',
'turquoise', 'navy']
# Indexed fx rates from 2007-01-02 to 2009-12-31
fx_index = fx.iloc[1759:2514,].copy()
# Calculate indexes
n = 0
for currency in curr_code :
fx_index[curr_code[n]] = fx_index[curr_code[n]]/fx_index.iloc[0,1+n]*100
n += 1
Data source: https://fred.stlouisfed.org/series/DGS10
Downloaded: 18th March 2020
# Read data
us_yield = pd.read_csv('US_yield.csv', skiprows = 13)
# Convert data column to date format
us_yield['date'] = pd.to_datetime(us_yield['date'], format = '%Y-%m-%d')
# Yield data from 2007-01-02 to 2009-12-31
us_yield_crisis = us_yield.iloc[11231:11983,:].copy()
This plot shows currencies against the US dollar from 2007-01-02 to 2009-12-31. The exchage rates are indexed at 2007-01-02.
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(go.Scatter(x = list(us_yield_crisis.date),
y = list(us_yield_crisis[' value']),
name = "<b>US 10 year Treasury yield</b>",
line = dict(color = 'black')),
secondary_y = True
)
n = 0
for i in curr_code:
fig.add_trace(go.Scatter(x = list(fx_index['Time Serie']),
y = list(fx_index[curr_code[n]]),
name = country[n],
line = dict(color = color[n],
width = 1.5),
visible = 'legendonly'),
secondary_y = False,
)
n += 1
# Add title and change background colour
fig.update_layout(
title_text = "US Treaury 10 year yield",
plot_bgcolor = 'whitesmoke'
)
# Set y-axes titles
fig.update_yaxes(title_text = "Indexed $ exchange rate",
secondary_y = False)
fig.update_yaxes(title_text = "US 10 year yield %",
secondary_y = True)
fig.show()
Data source: https://data.worldbank.org/indicator/BN.CAB.XOKA.GD.ZS
Downloaded: 18th March 2020
# Load current account data
current_ac = pd.read_csv('Current_account.csv',
skiprows = 4,
usecols = [0] + list(range(44,63)))
# List of indexes for countries in fx data frame
country_index = [11, 66, 178, 79, 27,
33, 38, 94, 107, 124,
152, 261, 206, 56, 117,
167, 175, 221, 136, 35,
231]
# Select countries and data from 2007
current_ac = current_ac.iloc[country_index,[0,8]]
Data source: https://data.worldbank.org/indicator/NY.GDP.MKTP.CD
Downloaded: 18th March 2020
# Load GDP data
gdp = pd.read_csv('gdp.csv',
skiprows = 4,
usecols = [0] + list(range(44,63)))
# Select countries and data from 2007
gdp = gdp.iloc[country_index, [0,8]]
Data source: https://data.worldbank.org/indicator/FI.RES.XGLD.CD
Downloaded: 19th March 2020
# Load FX reserves data
res = pd.read_csv("fx_reserves.csv",
skiprows = 3,
usecols = [0] + list(range(44,63)))
# Select countries and data from 2007
res = res.iloc[country_index, [0,8]]
# Divide reserves data frame by GDP data frame
res_ratio = res.iloc[:,1:].div(gdp.iloc[:,1:])
# Insert country name column
res_ratio.insert(value = res['Country Name'],
loc = 0,
column = 'Country')
# Select exhange rates from 2008-07-01 and 2009-01-30
percent_change = fx.iloc[[2136,2281], 1:].copy()
# Calculate the percentage change between the two dates for each currency
percent_change = percent_change.pct_change().iloc[1:, list(range(0,20)) + [21]]
# Add columns with country names, reserves/GDP and current account
percent_change = percent_change.T
percent_change['Country'] = list(current_ac['Country Name'])
percent_change['res_GDP'] = list(res_ratio['2007'])
percent_change['curr_ac'] = list(current_ac['2007'])
# Rename columns
percent_change.columns = ['per_ch', 'Country', 'res_GDP', 'curr_ac']
sns.regplot(x = percent_change['res_GDP'], y = percent_change['per_ch'])
plt.xlabel('Ratio of FX reserves to GDP');
plt.ylabel('% change in exchange rate');
sns.regplot(x = percent_change['curr_ac'], y = percent_change['per_ch'])
plt.xlabel('Current account %');
plt.ylabel('% change in exchange rate');
country_index = [11, 27, 33, 35, 38,
56, 66, 79, 94, 107,
117, 124, 136, 152, 167,
175, 178, 206, 221, 261]
country_list = ['Australia', 'Brazil', 'Canada', 'China', 'Denmark',
'Euro area', 'Hong Kong', 'India', 'Japan', 'Korea, Rep.',
'Malaysia', 'Mexico', 'New Zealand', 'Norway', 'Singapore',
'South Africa', 'Sri Lanka', 'Sweden', 'Switzerland', 'United Kingdom']
curr_code = ['AUD', 'EUR', 'NZD', 'GBP', 'BRL',
'CAD', 'CNY', 'HKD', 'INR', 'KRW',
'MXN', 'ZAR', 'SGD', 'DKK', 'JPY',
'MYR', 'NOK', 'SEK', 'LKR', 'CHF',
'TWD', 'THB']
## Read in data
fx = pd.read_csv("FX_rates.csv",
usecols = list(range(1,24)))
## Replace missing data with NaN
fx = fx.replace('ND', np.nan)
## Convert dates into date format
fx['Time Serie'] = pd.to_datetime(fx['Time Serie'],
format = '%Y-%m-%d')
## Create clean dataset with no missing data
fx = fx.dropna().copy()
## Convert string numbers into floats
fx.iloc[:,1:23] = fx.iloc[:,1:23].apply(pd.to_numeric)
# List of exchange rate
fx_names = list(fx.iloc[:,1:].columns)
Percentage change in exchange rate from 1 July 2008 to 31 January 2009 (define as crisis period)
percent_change = fx.iloc[[2136,2281],list(range(1,21))].copy()
percent_change = percent_change.pct_change().iloc[1]
per_sort = percent_change.sort_index(ascending = True)
plt.bar(x = country_list, height = per_sort)
plt.xticks(rotation = 90);
plt.ylabel('Appreciation (0.5 = 50% increase)');
current_ac = pd.read_csv('Current_account.csv',
skiprows = 4,
usecols = [0] + list(range(44,63)))
current_ac = current_ac.iloc[country_index,:].copy()
current_ac = current_ac.sort_values('Country Name').copy()
current_ac['per_ch'] = list(per_sort)
#Adding in column for average of Current_ac from Years 2004-2006
ca_avg = current_ac.iloc[:,5:8]
current_ac['ca_avg'] = ca_avg.mean(axis=1)
# Removing all irrelevant columns for regression
current_ac.drop(current_ac.columns[1:-2], axis =1, inplace = True)
current_ac = current_ac.sort_values(by='Country Name').copy()
current_ac = current_ac.rename({'Country Name': 'Country'}, axis=1)
# Resetting index of database (arranged alphabetically)
current_ac.reset_index(drop = True, inplace = True)
gdp = pd.read_csv('gdp.csv',
skiprows = 4,
usecols = [0] + list(range(44,63)))
gdp = gdp.iloc[country_index,:].copy()
gdp = gdp.rename({'Country Name': 'Country'}, axis=1)
# Adding in column for avg GDP from years 2004-2006
gdp_avg = gdp.iloc[:, 5:8]
gdp['gdp_avg'] = gdp_avg.mean(axis=1)
gdp = gdp.sort_values(by='Country').copy()
#Removing all irrelevant columns for regression in a new df called gdp_new
gdp_new = gdp.drop(gdp.columns[1:5], axis =1).copy()
gdp_2 = gdp_new.drop(gdp_new.columns[4:-1], axis =1).copy()
gdp_avg = gdp.drop(gdp.columns[1:-1], axis =1).copy()
# Resetting index of database (arranged alphabetically)
gdp.reset_index(drop = True, inplace = True)
gdp_growth = pd.read_csv("gdp_growth.csv",
skiprows = 3,
usecols = [0] + list(range(44,63)))
gdp_growth = gdp_growth.iloc[country_index,:].copy()
gdp_growth = gdp_growth.rename({'Country Name': 'Country'}, axis=1)
#Adding in average GDP growth for years 2004 - 2006
gdp_growth_avg = gdp_growth.iloc[:, 5:8]
gdp_growth['gdp_growth_avg'] = gdp_growth_avg.mean(axis=1)
# Removing irrelevant columns for regression
gdp_growth = gdp_growth.drop(gdp_growth.columns[1:-1], axis =1).copy()
gdp_growth = gdp_growth.sort_values(by='Country').copy()
gdp_growth.reset_index(drop = True, inplace = True)
res = pd.read_csv("fx_reserves.csv",
skiprows = 3,
usecols = [0] + list(range(44,63)))
res = res.iloc[country_index,:].copy()
res = res.sort_values(by='Country Name').copy()
res.reset_index(drop = True, inplace = True)
# Scale res by GDP to produce ratio
ratio = res.iloc[:,1:].div(gdp.iloc[:,1:-1])
ratio.insert(value = res['Country Name'],
loc = 0,
column = 'Country')
# Insert average of ratio from Years 2004-2006
ratio_avg = ratio.iloc[:, 5:8]
ratio['res_avg'] = ratio_avg.mean(axis=1)
#Remove irrelevant columns for regression
ratio = ratio.drop(ratio.columns[1:-1], axis =1).copy()
ir = pd.read_csv("interest_rate.csv",
skiprows = 4,
usecols = [0] + list(range(44,63)))
ir = ir.iloc[country_index,:].copy()
ir = ir.rename({'Country Name': 'Country'}, axis=1)
# Insert average of ratio from Years 2004-2006
ir_avg = ir.iloc[:, 5:8]
ir['ir_avg'] = ir_avg.mean(axis=1)
#missing data for Denmark, Euro, Switzerland, Sri Lanka and Norway (get from other data sources)
#Remove irrelevant columns for regression
ir = ir.drop(ir.columns[1:-1], axis =1).copy()
ir = ir.sort_values(by='Country').copy()
ir.reset_index(drop = True, inplace = True)
# Inserting missing real interest rate values
# Missing values obtained from external sources
ir.iloc[18,1] = 2.057
ir.iloc[4,1] = 0.770833333333
ir.iloc[13,1] = 0.99444444444
ir.iloc[5,1] = 0.152086111111
inf = pd.read_csv("inflation.csv",
skiprows = 4,
usecols = [0] + list(range(44,63)))
inf = inf.iloc[country_index,:].copy()
inf = inf.rename({'Country Name': 'Country'}, axis=1)
# Insert average of ratio from Years 2004-2006
inf_avg = inf.iloc[:, 5:8]
inf['inf_avg'] = inf_avg.mean(axis=1)
#Remove irrelevant columns for regression
inf = inf.drop(inf.columns[1:-1], axis =1).copy()
inf = inf.sort_values(by='Country').copy()
inf.reset_index(drop = True, inplace = True)
assets = pd.read_csv("CPIS_assets.csv",
skiprows = 4,
usecols = [1,5,6,7])
assets.rename({'Unnamed: 1': 'Country', 'DEC. 2004' : '2004', 'DEC. 2005' : '2005', 'DEC. 2006' : '2006'}, axis=1, inplace = True)
assets = assets.sort_values(by='Country').copy()
assets.iloc[:,1:] = assets.iloc[:,1:].apply(pd.to_numeric)
assets.reset_index(drop = True, inplace = True)
# Divide assets by GDP
assets_new = assets.iloc[:,1:].div(gdp.iloc[:,5:8])
# Find the average of assets from Year 2004-2006
assets['assets_avg'] = assets_new.mean(axis=1)
#Remove irrelevant columns for regression
assets = assets.drop(assets.columns[1:-1], axis =1).copy()
liab = pd.read_csv("CPIS_liab2.csv",
skiprows = 4,
usecols = [1,5,6,7])
liab.sort_values(by='Country').copy()
liab = liab.rename({'Unnamed: 0': 'Country', 'DEC. 2004' : '2004', 'DEC. 2005' : '2005', 'DEC. 2006' : '2006'}, axis=1)
liab.reset_index(drop = True, inplace = True)
liab.iloc[:,1:] = liab.iloc[:,1:].apply(pd.to_numeric)
# Divide liabilities by GDP
liab_new = liab.iloc[:,1:].div(gdp.iloc[:,5:8])
# Find the average of assets from Year 2004-2006
liab['liab_avg'] = liab_new.mean(axis=1)
# Remove irrelevant columns for regression
liab = liab.drop(liab.columns[1:-1], axis =1).copy()
reg = pd.merge(current_ac, gdp_avg, on = "Country", how = "outer")
reg = pd.merge(reg, ratio, on = "Country", how = "outer")
reg = pd.merge(reg, ir, on = "Country")
reg = pd.merge(reg, inf, on = "Country")
reg = pd.merge(reg, gdp_growth, on = "Country")
reg = pd.merge(reg, liab, on = "Country", how = "outer")
reg = pd.merge(reg, assets, on = "Country", how = "outer")
results_reg = smf.ols('per_ch ~ gdp_growth_avg + ca_avg + res_avg + ir_avg + inf_avg + liab_avg + assets_avg',
data = reg).fit()
print(results_reg.summary())
Remove currency peggers from regression (China)
Remove safe haven currencies (Japan, Hong Kong)
reg2 = reg.drop(reg.index[3]).copy()
reg2 = reg2.drop(reg.index[6]).copy()
reg2 = reg2.drop(reg.index[8]).copy()
results_reg2 = smf.ols('per_ch ~ gdp_growth_avg + ca_avg + res_avg + inf_avg + ir_avg + liab_avg + assets_avg',
data = reg2).fit()
print(results_reg2.summary())
Strong problems of multicollinearity --> insignificance
Variance Inflation Factor (remove variables that are closely correlated)
Partial Least Squares Regression
This will be investigated using time series analysis.
#Change column names for convenience
fx.columns = ['Date'] + curr_code
#Daily UK exchange rates, Jan 2005 to Jan 2009 inclusive
fx_uk = fx.iloc[range(1257, 2282), [0, 4]]
#Monthly average UK exchange rates, Jan 2005 to Jan 2009 inclusive
fx_uk = fx_uk.set_index('Date')
fx_uk = fx_uk.resample('M').mean()
The use of short-term interest rate data is motivated by two factors:
a) sources have mentioned that the unwinding of carry trades may be able to explain exchange rate movements during the global financial crisis, and
b) monthly data is available for it at https://data.oecd.org/interest/short-term-interest-rates.htm.
#Monthly real short-term interest rates, Jan 2005 to Jan 2009 inclusive
ir_uk = pd.read_csv("UK_STrates.csv", usecols = [5,6])
ir_uk = ir_uk.iloc[range(0, 49), :]
#Convert dates into pd.Datetime object
ir_uk['TIME'] = pd.to_datetime(ir_uk['TIME'], format = '%Y-%m-%d')
We want the data to be stationary; in the regression context the stationarity is important since the same results which apply for independent data holds if the data is stationary.
The mean and variance of a stationary time series are constant over time. Here is a comparison between a stationary and non-stationary series:
Image source: https://upload.wikimedia.org/wikipedia/commons/e/e1/Stationarycomparison.png
A variable with a time trend is highly likely to be correlated with another variable that has a time trend. For example, a person A's weight from birth to age 20 is going to be highly correlated with person B's weight from birth to age 20. This is true for the vast majority of any two people randomly selected from the global population. But obviously, person A's weight does not 'cause' and is not 'caused by' person B's weight.
Making a series stationary helps to reduce the possibility of detecting such a spurious correlation.
Two common ways of achieving this are:
1) Taking the logarithm of the series and
2) taking the first difference of the series (difference of consecutive values).
plt.rcParams.update({'figure.figsize':(9,5), 'figure.dpi':120})
#plt.subplot(x, y, z) means a plot with 3 rows, 1 column, where z is the row number of the subplot
plt.subplot(3, 1, 1)
plt.plot(fx_uk.index, fx_uk['GBP'])
plt.ylabel('GBP')
#Removes xticks to improve readability
plt.xticks([])
plt.title('USD/GBP')
plt.subplot(3, 1, 2)
plt.plot(fx_uk.index, np.log(fx_uk['GBP']))
#Removes xticks, since the bottom subplot has xticks
plt.xticks([])
plt.title('log(USD/GBP)')
plt.subplot(3, 1, 3)
plt.plot(fx_uk.index, fx_uk['GBP'].diff())
plt.xlabel('Date')
plt.title('Differenced USD/GBP')
plt.show()
Differencing makes the series noticeably more stationary. There are more quantitative methods to measure this, which is a possible extension. Arguably, the mean and/or variance increase towards the end of the period, so it may be worth restricting the period considered.
#plt.subplot(x, y, z) means a plot with 3 rows, 1 column, where z is the row number of the subplot
plt.subplot(3, 1, 1)
plt.plot(fx_uk.index, ir_uk['Value'])
plt.ylabel('%')
#Removes xticks to improve readability. The bottom subplot still has xticks
plt.xticks([])
plt.title('UK Short-term interest rates')
plt.subplot(3, 1, 2)
plt.plot(fx_uk.index, np.log(ir_uk['Value']))
plt.xticks([])
plt.title('log(UK Short-term interest rates)')
plt.subplot(3, 1, 3)
plt.plot(fx_uk.index, ir_uk['Value'].diff())
plt.xlabel('Date')
plt.title('Differenced UK Short-term interest rates')
plt.show()
Differencing appears to make the series more stationary, but this is less obvious than before. Here, we can say with more confidence that the mean decreases and the variance increases towards the end of the period, so an extension would be to restrict the period considered, perhaps from 2004-2007, since this would be entirely pre-crisis.
print ("Pearson correlation coefficient and p-value: ",
sp.stats.pearsonr(fx_uk['GBP'].diff()[1:], ir_uk['Value'].diff()[1:]))
The p-value roughly indicates the probability of an uncorrelated system producing datasets that have a similar correlation coefficient. Note that the accuracy of the p-value is reduced by the fact that our vectors only have 49 data points each. This is a consequence of having only monthly data, and the fact that it is only a short pre-crisis period we are interested in.
This small p-value suggests that there may be a causal relationship to be found between the variables, as we hypothesised. We can fit a regression model to test aspects of this.
#Hypothesised explanatory variable; differenced interest rate
#Reshaped since this is the format expected by the model
x = np.array(ir_uk['Value'].diff()[1:])
x_reshaped = np.array(ir_uk['Value'].diff()[1:]).reshape((-1, 1))
#Hypothesised response variable; differenced exchange rate
y = np.array(fx_uk['GBP'].diff()[1:])
model = LinearRegression().fit(x_reshaped, y)
print ("R**2: ", model.score(x_reshaped, y))
Around 30% of the variation in the USD/GBP exchange rate can be explained by UK short-term interest rates according to this model.
One way to quickly assess the adequacy of a linear model is by plotting the residuals, i.e. the difference between the actual and predicted y-values:
plt.plot(fx_uk['GBP'].diff()[1:] - model.predict(x_reshaped))
plt.xticks([])
plt.title('Residuals, UK model')
There is no clear pattern among the residuals, which suggests that the model is not inadequate.
This is used to determine if one time series will be useful to forecast another.
If x causes y, the forecast of y based on previous values of y and previous values of x should outperform the forecast of y based on previous values of y alone.
Null hypothesis: The series in the second column does not Granger cause the series in the first.
inp = pd.DataFrame({'x': x, 'y': y})
grangercausalitytests(inp, maxlag=2)
#maxlag specifies how much to lag x and y by
Looking at the p-values, the results are significant to a level below 0.01%.
#Daily USD/JPY exchange rate, Jan 2005 to Jan 2009 inclusive
fx_jpn = fx.iloc[range(1257, 2282), [0, 15]]
#Monthly average USD/JPY exchange rate, Jan 2005 to Jan 2009 inclusive
fx_jpn = fx_jpn.set_index('Date')
fx_jpn = fx_jpn.resample('M').mean()
#Monthly real short-term interest rates, Jan 2005 to Jan 2009 inclusive
ir_jpn = pd.read_csv("JPN_STrates.csv", usecols = [5,6])
ir_jpn = ir_jpn.iloc[range(0, 49), :]
#Convert dates into pd.Datetime object
ir_jpn['TIME'] = pd.to_datetime(ir_jpn['TIME'], format = '%Y-%m-%d')
print ('Pearson correlation coefficient and p-value: ',
sp.stats.pearsonr(fx_jpn['JPY'].diff()[1:], ir_jpn['Value'].diff()[1:]))
Results are very different for the Japanese Yen! But this does not cause us to reject our hypothesis, since it is a 'safe-haven' currency.
#Hypothesised explanatory variable; differenced interest rate
#Reshaped since this is the format expected by the model
x_ = np.array(ir_jpn['Value'].diff()[1:])
x_reshaped_ = np.array(ir_jpn['Value'].diff()[1:]).reshape((-1, 1))
#Hypothesised response variable; differenced exchange rate
y_ = np.array(fx_jpn['JPY'].diff()[1:])
model = LinearRegression().fit(x_reshaped_, y_)
#R**2 score
print ("R**2: ", model.score(x_reshaped_, y_))
plt.plot(fx_jpn['JPY'].diff()[1:] - model.predict(x_reshaped_))
plt.xticks([])
plt.title('Residuals')
inp_ = pd.DataFrame({'x': x_, 'y': y_})
grangercausalitytests(inp_, maxlag=2)